Introduction

Here we pre-process the Malaria Genomics Project Pf3k project data for both IBD analysis within isoRelate and to construct a processed ground truth data set using the known mixture clones present in the data.

Up to this point the following steps have been applied to the VCF files obtained from Pf3k website.

For each chromosome VCF file we applied the following filters using vcftools:

After we have processed each chromosome VCF we concatenate them together.

set -e
vcfdir=/path/to/malaria_gen5/vcf_files
outdir=./raw_data/
tempdir=~/tmp/
# step 1, select only SNPs + VQSLOD > 0, regiontype = core
vcf_chrom=$(find $vcfdir -regex ".*\.*[0-9][0-9]_v3.*.vcf.gz" | sort -u)

for chr in $vcf_chrom 
do
    (stem=$(basename $chr .vcf.gz)
    echo $stem
    vcftools --gzvcf $chr \
    --remove-indels \
    --min-alleles  2 \
    --max-alleles 2 \
    --remove-filtered-all \
    --recode \
    --recode-INFO-all \
    --stdout | bgzip -c > ${tempdir}${stem}.vcf.gz 
    tabix -p vcf ${tempdir}${stem}.vcf.gz) &

done
wait

filtered_vcf=$(find $tempdir -name "*.vcf.gz" | sort -u)

bcftools concat -Oz -o ${outdir}malaria_gen5_biallelic_snps.vcf.gz $filtered_vcf

Then we convert the VCF to the GDS format keeping only the tags of interest.

library(SeqArray)
vcf_file <- "raw_data/malaria_gen5_biallelic_snps.vcf.gz"

vcf_header <- seqVCF_Header(vcf_file)

# recode header format for AD from R to .
vcf_header$format$Number[vcf_header$ID == "AD"] <- "."

# info columns to keep
info.import <- c("AC", "AF", "AN", "BaseQRankSum", "ClippingRankSum",
                 "DP", "FS", "InbreedingCoeff", "MQ", "MQRankSum",
                 "QD", "ReadPosRankSum",
                 "SOR", "VQSLOD", "SNPEFF_AMINO_ACID_CHANGE",
                 "SNPEFF_CODON_CHANGE", "SNPEFF_EFFECT", "SNPEFF_EXON_ID",
                 "SNPEFF_FUNCTIONAL_CLASS", "SNPEFF_GENE_BIOTYPE", 
                 "SNPEFF_GENE_NAME", "SNPEFF_IMPACT", "SNPEFF_TRANSCRIPT_ID")

# format columns to keep
fmt.info <- c("AD", "DP", "GQ", "GT", "PL", "PGT",  "PID")

seqVCF2GDS(vcf_file, "processed_data/malaria_gen5.gds",
           header = vcf_header, 
           info.import = info.import, 
           fmt.import = fmt.info)

We now are ready to use the GDS file for analysis in R.

suppressPackageStartupMessages(library(SeqArray))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(moimix))

# use readonly = FALSE to add sample annotations
mgen <- seqOpen("processed_data/malaria_gen5.gds", readonly = FALSE)

The final variant set consists of 1057870 SNVs segregating by chromosome as follows:

knitr::kable(data.frame(chr = seqGetData(mgen, "chromosome") ) 
             %>% count(chr),
             caption = "SNV counts by chromosome")
SNV counts by chromosome
chr n
Pf3D7_01_v3 21475
Pf3D7_02_v3 35168
Pf3D7_03_v3 43199
Pf3D7_04_v3 45051
Pf3D7_05_v3 61349
Pf3D7_06_v3 55371
Pf3D7_07_v3 57980
Pf3D7_08_v3 58741
Pf3D7_09_v3 73279
Pf3D7_10_v3 72528
Pf3D7_11_v3 98484
Pf3D7_12_v3 99409
Pf3D7_13_v3 151941
Pf3D7_14_v3 183895

We have also processed the sample metadata to see how the samples segregate by geography. Samples with missing country variable correspond to the lab-strains from the complexity of infection and genetic crosses study.

# load metadata 
library(readr)
mgen_meta <- read_rds("processed_data/mgen_clean.rds")

There are 2512 total field isolates and the dataset can be broadly grouped into three broader geographic locations:

  1. West Africa: Senegal, The Gambia, Guinea, Mali, Ghana, Nigeria
  2. Central Africa: DR Congo, Malawi
  3. South-east Asia: Bangladesh, Cambodia, Vietnam, Thailand, Myanmar, Laos
west_africa <- c("The Gambia", "Ghana", "Guinea", "Mali", "Nigeria", "Senegal")
central_africa <- c("DR of the Congo", "Malawi")
se_asia <- c("Bangladesh", "Cambodia", "Laos", "Myanmar", "Thailand", "Vietnam")

mgen_meta$region <- NA
mgen_meta$region[mgen_meta$country %in% west_africa] <- "West Africa"
mgen_meta$region[mgen_meta$country %in% central_africa] <- "Central Africa"
mgen_meta$region[mgen_meta$country %in% se_asia] <- "SE Asia"

field_isolates <- mgen_meta %>% filter(!is.na(country))

knitr::kable(field_isolates %>% count(region, country, site))
region country site n
Central Africa DR of the Congo Kinshasa 113
Central Africa Malawi Chikwawa 317
Central Africa Malawi Zomba 52
SE Asia Bangladesh Ramu 50
SE Asia Cambodia Pailin 84
SE Asia Cambodia Preah Vihear 104
SE Asia Cambodia Pursat 235
SE Asia Cambodia Ratanakiri 147
SE Asia Laos Attapeu 85
SE Asia Myanmar Bago Division 60
SE Asia Thailand Mae Sot 106
SE Asia Thailand Ranong 20
SE Asia Thailand Sisakhet 22
SE Asia Vietnam Bu Dang 1
SE Asia Vietnam Bu Gia Map 64
SE Asia Vietnam Phuoc Long 32
West Africa Ghana Kassena 549
West Africa Ghana Kintampo 68
West Africa Guinea Nzerekore 100
West Africa Mali Bandiagara 9
West Africa Mali Faladje 36
West Africa Mali Kolle 51
West Africa Nigeria Ilorin 5
West Africa Senegal Thies 133
West Africa Senegal Velingara 4
West Africa The Gambia GM_Coastal 65
# add metadata to GDS file
# first reorder according to sample.id
mgen_meta <- mgen_meta %>% arrange(sample)
if(cnt.gdsn(index.gdsn(mgen, "sample.annotation")) != 4) {
    add.gdsn(index.gdsn(mgen, "sample.annotation"),
             name = "acc", val = mgen_meta$acc)
    add.gdsn(index.gdsn(mgen, "sample.annotation"), 
             name = "region", val =mgen_meta$region)
    add.gdsn(index.gdsn(mgen, "sample.annotation"), 
             name = "country",val = mgen_meta$country)
    add.gdsn(index.gdsn(mgen, "sample.annotation"),
             name = "site", val = mgen_meta$site)
}

Additional sample and SNP filtering

Prior to estimating the minor allele frequencies and obtaining a final set of SNPs and samples. We need to ensure we are confident in the SNP set we have obtained and that samples are of high-quality. Although these samples have been processed by there still might be samples that are poorly genotyped and SNVs that aren’t polymorphic globally or within the subpopulations.

We first look at the various quality metrics provided by the Haplotype Caller for variant annotated in the VCF files. Of particular interest is the QD MQRanksum, and SOR tags.

library(ggplot2)
library(grid)
theme_set(theme_bw())
gatk_df <- data.frame(variant.id = seqGetData(mgen, "variant.id"),
                      chr = seqGetData(mgen, "chromosome"),
                      mq =  seqGetData(mgen, "annotation/info/MQ"),
                      mqrank = seqGetData(mgen, "annotation/info/MQRankSum"),
                      qd = seqGetData(mgen, "annotation/info/QD"),
                      sor = seqGetData(mgen, "annotation/info/SOR"))

grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 2)))

print(ggplot(gatk_df, aes(x = mq)) + stat_ecdf(),
      vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(ggplot(gatk_df, aes(x = mqrank)) + stat_ecdf(),
      vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
## Warning: Removed 220140 rows containing non-finite values (stat_ecdf).
print(ggplot(gatk_df, aes(x = qd)) + stat_ecdf(),
      vp =  viewport(layout.pos.row = 2, layout.pos.col = 1))
## Warning: Removed 96 rows containing non-finite values (stat_ecdf).
print(ggplot(gatk_df, aes(x =  sor)) + stat_ecdf(),
      vp =  viewport(layout.pos.row = 2, layout.pos.col = 2))

We see that all variants are supported by high residual mean square mapping qualities over all samples. The distribution of MQRankSum scores (which is only computed at heterozygous sites) is centred at 0 with a fat right tail. For this tag only negative scores are useful since that indicates that the mapping quality for the alternate allele is low compared to the reference allele. The QD distribution is relatively symmetric and centred at around 20, most variants have relatively good ratios of average quality to average depth. The SOR tag is an odds ratio score to detect strand bias from a 2 x 2 contingency table using the strands and alt/ref allele counts as the rows and columns respectively.

We apply some hard filters first before looking at coverage distributions, * QD score > 15 * SOR < 1 * MQRankSum > -2

Next we look at the empirical cumulative distribution functions (ECDF) at:

# filter based on field samples
# filter based on qd > 20, sor > 1, mqrank > -2 
v_ids <- (gatk_df %>% filter(qd > 20, sor > 1 | is.na(sor), mqrank > -2 | is.na(mqrank)))$variant.id
seqSetFilter(mgen, sample.id = field_isolates$sample, variant.id = v_ids)
## # of selected samples: 2,512
## # of selected variants: 146,997
# sample coverage 
sample_5x <- perSiteCoverageBySNP(mgen, 5L)
sample_10x <- perSiteCoverageBySNP(mgen, 10L)
sample_20x <- perSiteCoverageBySNP(mgen, 20L)

cbbPalette <- c("#000000", "#E69F00", "#56B4E9")

plot(ecdf(sample_5x),
     col = cbbPalette[1],
     xlab = "Proportion of samples",
     ylab = "Cumulative proportion of SNPs",
     main = "")
lines(ecdf(sample_10x), col = cbbPalette[2])
lines(ecdf(sample_20x), col = cbbPalette[3])
legend('left',  c('5x', '10x', '20x'), fill=cbbPalette, border=NA)

From the ECDF coverage plots we find that more than 90% of samples have at least 90% of theirs SNPs covered to 5 bases. Moreover, 90% of samples have at least 70% of their SNPs covered to 10 bases and 90% of samples have at least 60% of their SNPs covered by 20 bases.

# snp coverage by number of samples covered up to 5x, 10x, 10x
snp_5x <- perSiteCoverageBySample(mgen, 5L)
snp_10x <- perSiteCoverageBySample(mgen, 10L)
snp_20x <- perSiteCoverageBySample(mgen, 20L)


plot(ecdf(snp_20x),
     col = cbbPalette[1], xlab = "Proportion of SNPs", 
     ylab = "Cumulative proportion of of samples",
     main = "")
lines(ecdf(snp_10x), col = cbbPalette[2])
lines(ecdf(snp_20x), col = cbbPalette[3])
legend('left', c('5x', '10x', '20x'), fill=cbbPalette, border=NA)

Similarly, from the ECDF coverage plots we see that more than 90% of the SNPs are covered to at least 5 bases in 90% of samples. While 60% of SNPs are covered up to 10 bases in 90% of samples and 30% of SNP s are covered up to 20 bases in 90% of the samples.

Since for this analysis we are more interested in evaluating known variants (i.e. true positive variants) we applied some hard filtering to this dataset:

variant_filter <- v_ids[which(snp_20x > 0.8)]


samples_to_keep <- field_isolates$sample[which(sample_5x > 0.9)]

seqSetFilter(mgen, sample.id = samples_to_keep, variant.id = variant_filter)
## # of selected samples: 2,039
## # of selected variants: 95,930

This leaves us with 2039 samples and 685596 variants. The samples segregate by region as follows:

filtered_fi <- field_isolates %>% filter(sample %in% samples_to_keep)
knitr::kable(filtered_fi %>% count(region, country))
region country n
Central Africa DR of the Congo 58
Central Africa Malawi 357
SE Asia Bangladesh 31
SE Asia Cambodia 472
SE Asia Laos 80
SE Asia Myanmar 54
SE Asia Thailand 132
SE Asia Vietnam 93
West Africa Ghana 512
West Africa Guinea 100
West Africa Mali 46
West Africa Nigeria 4
West Africa Senegal 48
West Africa The Gambia 52

And the variants segregate by chromosome as follows:

coords <- getCoordinates(mgen)

knitr::kable(coords %>% count(chromosome))
chromosome n
Pf3D7_01_v3 1932
Pf3D7_02_v3 3532
Pf3D7_03_v3 3706
Pf3D7_04_v3 3817
Pf3D7_05_v3 5133
Pf3D7_06_v3 5629
Pf3D7_07_v3 5183
Pf3D7_08_v3 5582
Pf3D7_09_v3 5084
Pf3D7_10_v3 6699
Pf3D7_11_v3 9426
Pf3D7_12_v3 9342
Pf3D7_13_v3 14352
Pf3D7_14_v3 16513

Constructing three region specific data subsets

For the IBD analysis we require filters on the variants to ensure the SNPs have high enough MAFs and to annotate samples that are putatively clonal.

To do this we perform the following steps 1. Subset the calls by geographic region defined above. 2. Estimate the \(F_{ws}\) metric to annotate samples as either single clones or multiple clone infections. We use this for a quick proxy for multiple infections, before assessing this in more detail using moimix. 3. Estimate the MAFs directly from read-count data and select variants that are polymorphic in that region.

We also save the PED and MAP file as an R objects to speed up write-out.

Central Africa

# get central africa IDs
centr_afr <- seqGetData(mgen, "sample.id")[seqGetData(mgen, "sample.annotation/region") == "Central Africa"]
# filter gds
seqSetFilter(mgen, sample.id = centr_afr, variant.id = variant_filter)

# estimate MAF
maf_ca <- getMAF(mgen)
# remove variants that are not polymorphic at all in this region (MAF == 0)
snp_ca <- variant_filter[maf_ca > 0]

hist(maf_ca[snp_ca], breaks = 100)
seqSetFilter(mgen, sample.id = centr_afr, variant.id = snp_ca)
# estimate Fws metric
fws_central_africa <- getFws(mgen)
moi_ca <- data.frame(sample = centr_afr, fws = fws_central_africa, 
                     moi = ifelse(fws_central_africa > 0.95, 1, 2))

hist(fws_central_africa, breaks = 100)

plink_ca <- extractPED(mgen, moi.estimates = moi_ca$moi)

write_rds(plink_ca, "processed_data/central_africa.rds")

seqSetFilter(mgen)

West Africa

# get central africa IDs
west_afr <- seqGetData(mgen, "sample.id")[seqGetData(mgen, "sample.annotation/region") == "West Africa"]
# filter gds
seqSetFilter(mgen, sample.id = west_afr, variant.id = variant_filter)

# estimate MAF
maf_wa <- getMAF(mgen)
# remove variants that are not polymorphic at all in this region (MAF == 0)
snp_wa <- variant_filter[maf_ca > 0]
hist(maf_wa[snp_wa], breaks = 100)

seqSetFilter(mgen, sample.id = west_afr, variant.id = snp_wa)
# estimate Fws metric
fws_west_africa <- getFws(mgen)
moi_wa <- data.frame(sample = west_afr, fws = fws_west_africa, 
                     moi = ifelse(fws_west_africa > 0.95, 1, 2))


hist(fws_west_africa, breaks = 100)

plink_wa <- extractPED(mgen, moi.estimates = moi_wa$moi)

write_rds(plink_wa, "processed_data/west_africa.rds")
seqSetFilter(mgen)

SE Asia

# get central africa IDs
se_asia <- seqGetData(mgen, "sample.id")[seqGetData(mgen, 
                                                    "sample.annotation/region") == "SE Asia"]
# filter gds
seqSetFilter(mgen, sample.id = se_asia, variant.id = variant_filter)

# estimate MAF
maf_sea <- getMAF(mgen)
# remove variants that are not polymorphic at all in this region (MAF == 0)
snp_sea <- variant_filter[maf_sea > 0]
hist(maf_sea[snp_sea], breaks = 100)

seqSetFilter(mgen, sample.id = se_asia, variant.id = snp_sea)
# estimate Fws metric
fws_se_asia <- getFws(mgen)
moi_sea <- data.frame(sample = se_asia, fws = fws_se_asia, 
                     moi = ifelse(fws_se_asia > 0.95, 1, 2))

hist(fws_se_asia, breaks = 100)

plink_sea <- extractPED(mgen, moi.estimates = moi_sea$moi)

write_rds(plink_sea, "processed_data/se_asia.rds")

seqSetFilter(mgen)

Save annotation sets

We also save the variant annotations across all populations.

seqSetFilter(mgen, variant.id = variant_filter)

annotations <- data.frame(chr = seqGetData(mgen, "chromosome"),
                          pos = seqGetData(mgen, "position"),
                          gene_name = seqGetData(mgen, "annotation/info/SNPEFF_GENE_NAME"),
                          gene_biotype = seqGetData(mgen, "annotation/info/SNPEFF_GENE_BIOTYPE"),
                          snp_effect = seqGetData(mgen, "annotation/info/SNPEFF_AMINO_ACID_CHANGE"),
                          snp_functional_class = seqGetData(mgen, "annotation/info/SNPEFF_EFFECT"), 
                          snp_aa_change = seqGetData(mgen, "annotation/info/SNPEFF_AMINO_ACID_CHANGE"),
                          snp_codon_change = seqGetData(mgen, "annotation/info/SNPEFF_CODON_CHANGE"),
                          exon_id = seqGetData(mgen, "annotation/info/SNPEFF_EXON_ID"),
                          transcript_id = seqGetData(mgen, "annotation/info/SNPEFF_TRANSCRIPT_ID"))

# counts
knitr::kable(annotations %>% 
                 count(snp_functional_class, gene_biotype))

write_rds(annotations, "processed_data/snp_annotations_clean.rds")

Extracting mixture data

To construct polymorphic markers for the known mixture strains we extract a set of high MAF and high heterzygous set of global markers from the global field isolate sets.

seqSetFilter(mgen, sample.id = samples_to_keep, variant.id = variant_filter)
## # of selected samples: 2,039
## # of selected variants: 95,930
maf_all <- getMAF(mgen)

het_all <- getHeterozygosity(mgen)

hist(maf_all, breaks = 50)
hist(het_all, breaks = 50)

We see that as observed in the other populations the MAF distribution is biased to rare alleles. If we choose SNVs that are heterozygous, this should give us a cleaner signal for detecting MOI.

mix_inform_snps <- variant_filter[maf_all > 0.05 & het_all > 0.05]

# read in mixture metadata
mixture <- read_rds("processed_data/mixtures_all.rds")

seqSetFilter(mgen, variant.id = mix_inform_snps, sample.id = mixture$sample)
## # of selected samples: 32
## # of selected variants: 332

We also plot the BAF frequencies to see if the signal remains

bf <- bafMatrix(mgen)

for(sample in mixture$sample) {
    plot(bf, sample)
}

Finally, we export the mixture data to a GDS file and make it available to moimix.

seqExport(mgen, "processed_data/mixtures_clean.gds")
## Export to 'processed_data/mixtures_clean.gds'
##     sample.id (32)  [md5: de9681146247db2f63aa94d6ec8e4de0]
##     variant.id (332)  [md5: abe582b4135d21d9752eec774c2408f0]
##     position  [md5: c4fa4b62b01c7512b7f90c545d0c9a44]
##     chromosome  [md5: e63b8c5a5dea28f3bcf241ff178d58c6]
##     allele  [md5: b369800c15e1c7ee87a0c4ea1df61dc0]
##     genotype  [md5: 6ec6424cc91dddbd4af767f210d0fac4]
##     annotation/id  [md5: 4167db6bc45ce4e77e3817662cb8af24]
##     annotation/qual  [md5: 1a249b55742259496862210a8d1fd7a7]
##     annotation/filter  [md5: 452632275d189b9139b31a0fc8ec1904]
##     annotation/info/AC  [md5: ac5c2ed33cdc9d4e921d8d7256fe63f5]
##     annotation/info/AF  [md5: ba998a009e91659f1ddba7d1069a7d8c]
##     annotation/info/AN  [md5: d475eacd7cefa3abcc71b069c26a80d0]
##     annotation/info/BaseQRankSum  [md5: d932638a2f6ddd958807f723d99da2f5]
##     annotation/info/ClippingRankSum  [md5: 0ceb19108b5ea40d766b9f54af371ede]
##     annotation/info/DP  [md5: fa8a2d112de258653ba841597424eefc]
##     annotation/info/FS  [md5: 7c42a8af54e66b5f5b0c485ca6a8de60]
##     annotation/info/InbreedingCoeff  [md5: 45c89a3ecfc3eb8619479e29c004c084]
##     annotation/info/MQ  [md5: 22668bba748cb13fb2e7237bce6de3b7]
##     annotation/info/MQRankSum  [md5: 149210eeed040c5e8b8a453e9fd00c7a]
##     annotation/info/QD  [md5: dfc958bc1596ce673887151bf02c3102]
##     annotation/info/ReadPosRankSum  [md5: 4a9db88657f86651830000feb3280c13]
##     annotation/info/SNPEFF_AMINO_ACID_CHANGE  [md5: 18ce138e7c1912d67afa9169aabe5c80]
##     annotation/info/SNPEFF_CODON_CHANGE  [md5: e4ff41072e6f819505a247496608e56f]
##     annotation/info/SNPEFF_EFFECT  [md5: 1fd46f82eb58e4d0b9a7b69296ccb7b4]
##     annotation/info/SNPEFF_EXON_ID  [md5: dabfa9f11bf3b233454d1bd8816e5bfa]
##     annotation/info/SNPEFF_FUNCTIONAL_CLASS  [md5: 202aff782d171916a59b541c6c86c17e]
##     annotation/info/SNPEFF_GENE_BIOTYPE  [md5: 82b08ba2326218b8b15334edfc738be4]
##     annotation/info/SNPEFF_GENE_NAME  [md5: 7bd377ee329837f7898d184f58a1c038]
##     annotation/info/SNPEFF_IMPACT  [md5: 7cc082673c8174acb9f0dd470b8948d6]
##     annotation/info/SNPEFF_TRANSCRIPT_ID  [md5: 6766549ec127aeeceff9ae22017b9618]
##     annotation/info/SOR  [md5: 27a5f10aa995a0f0fdbe4b90a7403f66]
##     annotation/info/VQSLOD  [md5: a452f4771d84e9ffe37d285ef7593a5b]
##     annotation/format/AD  [md5: 60d3d3dd6f69149b001bacc0e8e8f0c5]
##     annotation/format/DP  [md5: 0308f4b34f717fe3e0b40cfec068f3d3]
##     annotation/format/GQ  [md5: d59ed03b4cf8db88f51d2c7cc13c1109]
##     annotation/format/PGT  [md5: 6f6399fb59ffc0c917fe280e0755dad8]
##     annotation/format/PID  [md5: 52d6b9f42360c7c6f6b7f84098750eb5]
##     annotation/format/PL  [md5: 6893e32cc68f5607057c465c272d8408]
##     sample.annotation/acc  [md5: beac0a46021a580ebac39d24a8d25872]
##     sample.annotation/region  [md5: 70bc8f4b72a86921468bf8e8441dce51]
##     sample.annotation/country  [md5: 70bc8f4b72a86921468bf8e8441dce51]
##     sample.annotation/site  [md5: 70bc8f4b72a86921468bf8e8441dce51]
## Done.
## Optimize the access efficiency ...
## Clean up the fragments of GDS file:
##     open the file 'processed_data/mixtures_clean.gds' (124.5K)
##     # of fragments: 277
##     save to 'processed_data/mixtures_clean.gds.tmp'
##     rename 'processed_data/mixtures_clean.gds.tmp' (122.9K, reduced: 1.6K)
##     # of fragments: 141

Appendix

# tidy up gds
seqSetFilter(mgen)
## # of selected samples: 2,640
## # of selected variants: 1,057,870
seqClose(mgen)
# cleanup.gds("processed_data/malaria_gen5.gds")

sessionInfo()
## R version 3.3.0 (2016-05-03)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.9.5 (Mavericks)
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] ggplot2_2.1.0     readr_0.2.2       moimix_0.0.1.9001 flexmix_2.3-13   
## [5] lattice_0.20-33   dplyr_0.4.3       SeqArray_1.12.3   gdsfmt_1.8.2     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.5                Rsamtools_1.24.0          
##  [3] Biostrings_2.40.0          assertthat_0.1            
##  [5] digest_0.6.9               R6_2.1.2                  
##  [7] GenomeInfoDb_1.8.1         plyr_1.8.3                
##  [9] MatrixModels_0.4-1         stats4_3.3.0              
## [11] RSQLite_1.0.0              evaluate_0.9              
## [13] coda_0.18-1                httr_1.1.0                
## [15] highr_0.6                  zlibbioc_1.18.0           
## [17] GenomicFeatures_1.24.2     lazyeval_0.1.10           
## [19] curl_0.9.7                 SparseM_1.7               
## [21] S4Vectors_0.10.0           Matrix_1.2-6              
## [23] rmarkdown_0.9.6            labeling_0.3              
## [25] devtools_1.11.1            BiocParallel_1.6.2        
## [27] stringr_1.0.0              GWASExactHW_1.01          
## [29] RCurl_1.95-4.8             biomaRt_2.28.0            
## [31] munsell_0.4.3              rtracklayer_1.32.0        
## [33] BiocGenerics_0.18.0        mcmc_0.9-4                
## [35] htmltools_0.3.5            nnet_7.3-12               
## [37] SummarizedExperiment_1.2.2 IRanges_2.6.0             
## [39] XML_3.98-1.4               crayon_1.3.1              
## [41] withr_1.0.1                GenomicAlignments_1.8.0   
## [43] MASS_7.3-45                bitops_1.0-6              
## [45] gtable_0.2.0               DBI_0.4-1                 
## [47] git2r_0.15.0               magrittr_1.5              
## [49] formatR_1.4                SeqVarTools_1.10.0        
## [51] scales_0.4.0               stringi_1.0-1             
## [53] XVector_0.12.0             RColorBrewer_1.1-2        
## [55] tools_3.3.0                BSgenome_1.40.0           
## [57] Biobase_2.32.0             parallel_3.3.0            
## [59] yaml_2.1.13                AnnotationDbi_1.34.2      
## [61] colorspace_1.2-6           GenomicRanges_1.24.0      
## [63] memoise_1.0.0              logistf_1.21              
## [65] knitr_1.13                 VariantAnnotation_1.18.1  
## [67] quantreg_5.24              MCMCpack_1.3-6            
## [69] modeltools_0.2-21